#!pip install tweepy
#!pip install wordcloud
#!pip install textblob
#!pip install plotly
import tweepy
consumer_key = "QEol1Q74mM83pvTUU0T3y7Jz2"
consumer_secret = "vmYNdrI3TU20btipbfvRmSTYp5LsaoUx9hMBt9Kh8VziZ7Lcgi"
access_token = "1457441520143974401-0j6Pkm3HAXhm0eK7P0JozY3kURqKmL"
access_token_secret = "V7rUOaA63i0RoMBTSDlV5092Wu2ZNOykVI6q6rtTFPxlm"
# Initializing Tweepy API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
api = tweepy.API(auth, wait_on_rate_limit=True)
from notebook.services.config import ConfigManager
cm = ConfigManager().update('notebook', {'limit_output': 10})
import csv
import tweepy
import re
import io
# Open/create a file to append data to
csvFile = open("Oscars.csv",'a')
#Use csv writer
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['username','time', 'tweet_text'])
for tweet in tweepy.Cursor(api.search_tweets,
q = "Oscars",
until = "2022-04-27",
lang = "en").items(6000):
# Write a row to the CSV file. I use encode UTF-8
csvWriter.writerow([tweet.user.screen_name,tweet.created_at, tweet.text.encode('utf-8')])
print ("user_name:",tweet.user.screen_name,
"Time:",tweet.created_at,"Tweeted:",tweet.text)
csvFile.close()
user_name: TheInSneider Time: 2022-04-26 23:59:54+00:00 Tweeted: Nothing is a lock when it comes to the Oscars, especially these days, but there are two performances I've heard are… https://t.co/PjaMkuNCiJ user_name: 444mnesiaa Time: 2022-04-26 23:59:25+00:00 Tweeted: RT @uwaisb_: Will Smith's actions at the Oscars sparked more outrage and condemnation than Israel's current war crimes, this world's concep… user_name: notjeremy_ Time: 2022-04-26 23:59:22+00:00 Tweeted: Amber Heard did a lot more than a slap and something tells me she won’t be banned from the Oscars next year user_name: RonBro66 Time: 2022-04-26 23:59:04+00:00 Tweeted: RT @PageSix: Will Smith travels to India following Oscars slap controversy https://t.co/kFQESoiLf1 https://t.co/Lmg58FTWEd user_name: tasmseb Time: 2022-04-26 23:58:58+00:00 Tweeted: rue euphoria http_daddyy 18+ nsfw nsfwfk help write writing essay essay help sugar daddy sugar mommy spoil baby cas… https://t.co/IwDmSzXfnU user_name: MoofyKitten Time: 2022-04-26 23:58:54+00:00 Tweeted: Much like the Oscars slap bullshit, I am tired of hearing about the Johnny Depp bullshit. Would very much like to s… https://t.co/bIysvuKWj6 user_name: DannyPerez1438 Time: 2022-04-26 23:58:52+00:00 Tweeted: RT @ErikDavis: Notes on Sony’s #CinemaCon presentation: - #AcrossTheSpiderVerse footage was the big winner. Everyone walking out is RAVING… user_name: user45761827 Time: 2022-04-26 23:58:43+00:00 Tweeted: coming to sweep the oscars https://t.co/YoXJARJka3 user_name: BHPReviews Time: 2022-04-26 23:57:52+00:00 Tweeted: RT @CassanovaHefner: This Slap better than the Oscars. HANDS DOWN! 😮💨 https://t.co/VorceYAWmg user_name: TammyBr48675861 Time: 2022-04-26 23:57:34+00:00 Tweeted: @RealBrysonGray CMI yeah didn't Angelina Jolie drink her brother's blood at some Oscars or something user_name: parrotfishie Time: 2022-04-26 23:57:33+00:00 Tweeted: RT @4UMERM4ID: Our celebrity. He's won 20 Oscars and at least 100 Grammys. user_name: lhctm_74 Time: 2022-04-26 23:57:23+00:00 Tweeted: RT @LauraPausini: A year ago I was in Los Angeles waiting for the night of the Oscars. Everyday many magazines asked us for new pictures...… user_name: o_aidee Time: 2022-04-26 23:57:20+00:00 Tweeted: RT @CTilburyMakeup: 💞🚨 NEW! PILLOW TALK BEAUTY SECRETS! 🚨💞 Darlings, Pillow Talk is EVERYONE’S PERFECT SHADE!! My makeup kit for Oscars we… user_name: 333X2C Time: 2022-04-26 23:57:14+00:00 Tweeted: oscars are being handed out as we speak https://t.co/8xnsvcb0Yb user_name: KZu_67 Time: 2022-04-26 23:57:12+00:00 Tweeted: RT @PopBase: Chris Rock’s mom talks about the Oscars slap: “When Will slapped Chris, he slapped all of us. But he really slapped me. Becau… user_name: RobertB51707688 Time: 2022-04-26 23:56:23+00:00 Tweeted: I smell Oscars ❤️ https://t.co/SppjwevuqS user_name: jpurds Time: 2022-04-26 23:56:12+00:00 Tweeted: @oscars_central Only Foxcatcher user_name: emiliascillian Time: 2022-04-26 23:56:07+00:00 Tweeted: I will be watching both but I need to know what the plot for Barbie is so I can campaign both Cillian and Margot’s… https://t.co/rH9OLz6Knl user_name: melchoruh Time: 2022-04-26 23:55:48+00:00 Tweeted: RT @ColIegeStudent: Just like the Oscars https://t.co/QICD5AF0O7 user_name: PersonBeginning Time: 2022-04-26 23:55:32+00:00 Tweeted: @badniggafela You see John David Washington movies, are always confusing. I wonder why it wasn’t nominated for oscars?? user_name: dn_Tb Time: 2022-04-26 23:55:30+00:00 Tweeted: RT @Frame_io: 🎥🍿Congrats to the winners and nominees! See how they did it with our deep dive giving you all the details about what it takes… user_name: madsiesaccount Time: 2022-04-26 23:55:27+00:00 Tweeted: i will never get over the way the oscars slandered encanto with that performance. they literally need to apologize to colombia for that user_name: lein_add Time: 2022-04-26 23:55:01+00:00 Tweeted: RT @CassanovaHefner: This Slap better than the Oscars. HANDS DOWN! 😮💨 https://t.co/VorceYAWmg user_name: rags751 Time: 2022-04-26 23:54:40+00:00 Tweeted: RT @Fandango: A you-had-to-be-there tweet but let Baz Luhrmann host the Oscars. #Cinemacon2022 user_name: beyonceparkwood Time: 2022-04-26 23:54:03+00:00 Tweeted: Today 1 month ago, Beyoncé performed at the Oscars! https://t.co/fgowfsxVd8 user_name: 935KDAY Time: 2022-04-26 23:53:25+00:00 Tweeted: #KrushGroove artist DJ Quik chooses between performing “Hand In Hand” or “Down Down Down”🔥💯👑 • @djquik… https://t.co/CrARzO16Wr user_name: Kittycrotch97 Time: 2022-04-26 23:52:41+00:00 Tweeted: RT @fordhoIden: will smith should go back to the oscars in ten years and just do that to chris rock again user_name: LyssaGrace77 Time: 2022-04-26 23:52:28+00:00 Tweeted: RT @CasIsLoved: castiel's confession trended above us elections. misha collins at the oscars trended above all the a-listers. misha coming… user_name: Fandango Time: 2022-04-26 23:51:33+00:00 Tweeted: A you-had-to-be-there tweet but let Baz Luhrmann host the Oscars. #Cinemacon2022 user_name: yusufelebiaryy Time: 2022-04-26 23:51:20+00:00 Tweeted: and the oscars, the oscars are my super bowl. what would i wear? what would i say in my acceptance speech? would i… https://t.co/L29dNoCx53 user_name: jodaniecarpino Time: 2022-04-26 23:51:19+00:00 Tweeted: @goldenwaves102 @TamaraBraun And as Oscars mom[ am I right] she wouldnt have to come back from the dead. But would… https://t.co/OEnzMkUdw2 user_name: 50one5O Time: 2022-04-26 23:50:57+00:00 Tweeted: RT @ohkayer: her name being engraved into the oscars as we speak user_name: heyhi2016 Time: 2022-04-26 23:50:09+00:00 Tweeted: RT @amorversace: Elsa Hosk wearing vintage Tom Ford for Gucci to the 2019 Vanity Fair post-Oscars party https://t.co/rQjBgWnYr1 user_name: webdesign_asl Time: 2022-04-26 23:50:00+00:00 Tweeted: Hi, I'm a professional #graphicsdesigner. Place an order and get a unique #app icon with fast delivery. We complete… https://t.co/TEDpLeNGrX user_name: omarIoya Time: 2022-04-26 23:49:57+00:00 Tweeted: and amy schumer? somewhere talking about the oscars drama still https://t.co/7a7zSKIjSA user_name: realgoebs Time: 2022-04-26 23:49:53+00:00 Tweeted: Gone with the wind! Disappeared in the slap at the Oscars and war on Disney! https://t.co/eKxSPnmX0W user_name: henjum74 Time: 2022-04-26 23:49:50+00:00 Tweeted: RT @inlauvperry: Cancel culture really is the weirdest thing, Will Smith is cancelled for hitting Chris Rock at the Oscars but Amber Heard… user_name: oceansjonas Time: 2022-04-26 23:49:46+00:00 Tweeted: don’t worry darling coming for oscars user_name: sheiaaa623 Time: 2022-04-26 23:48:55+00:00 Tweeted: RT @stardustRodrigo: @FilmUpdates All the oscars incoming https://t.co/BVO5LhckFB user_name: QuinCunning Time: 2022-04-26 23:47:59+00:00 Tweeted: This is going to SWEEP the Oscars https://t.co/oMV6B6JdbA user_name: cigarette14_ Time: 2022-04-26 23:47:39+00:00 Tweeted: RT @inlauvperry: Cancel culture really is the weirdest thing, Will Smith is cancelled for hitting Chris Rock at the Oscars but Amber Heard… user_name: Sp00kyAngie Time: 2022-04-26 23:47:31+00:00 Tweeted: RT @adorenxtasha: remember when chris evans caught henry cavil eating cookies at oscars 💀 https://t.co/5DvhY4kj77 user_name: hrssantana Time: 2022-04-26 23:47:26+00:00 Tweeted: RT @Dream: I need twitter likes: Will Smith slapped Chris Rock at Oscars user_name: ohkayer Time: 2022-04-26 23:46:54+00:00 Tweeted: her name being engraved into the oscars as we speak https://t.co/GCp7m3fgPX user_name: oscars_central Time: 2022-04-26 23:46:19+00:00 Tweeted: Sooo #FilmTwitter we have #Barbie and #Oppenheimer releasing the same day on 7/21/23…you can only see one, which film are you seeing? user_name: KIWlSANGEL Time: 2022-04-26 23:46:19+00:00 Tweeted: harry and timmy and the oscars together https://t.co/reb3877IfJ user_name: Rex44388079 Time: 2022-04-26 23:46:15+00:00 Tweeted: RT @ivery5000: @Obertson @CheriJacobus @SethAbramson @DanRather @DebraMessing @RepAdamSchiff @RepSwalwell @MiaFarrow @DrDenaGrayson @JillWi… user_name: barnwickgeorge Time: 2022-04-26 23:45:39+00:00 Tweeted: RT @GiannisAMess: Throwback to when Dolly Parton tore it UP at the Oscars singing her Oscar-nominated song “Travelin Thru” about the life a… user_name: artofdominique Time: 2022-04-26 23:45:26+00:00 Tweeted: MY GIRL LOOKS PERFECT THE OSCARS ARE BEING DUSTED OFF AS WE SPEAK https://t.co/eJTUidiAyu user_name: DoctorWillSmith Time: 2022-04-26 23:44:55+00:00 Tweeted: Tell the truth! Tell the truth! #Oscars user_name: valesantilopezz Time: 2022-04-26 23:44:20+00:00 Tweeted: RT @stardustRodrigo: @FilmUpdates All the oscars incoming https://t.co/BVO5LhckFB user_name: BeastlyEsoteric Time: 2022-04-26 23:44:18+00:00 Tweeted: RT @driverminnie: Weird vibe after… so we had a dance. #Oscars https://t.co/XVbR4CKm4r user_name: argfilm Time: 2022-04-26 23:44:06+00:00 Tweeted: RT @stardustRodrigo: @FilmUpdates All the oscars incoming https://t.co/BVO5LhckFB user_name: danieltheedime Time: 2022-04-26 23:44:06+00:00 Tweeted: @FilmUpdates oscars incoming for half of hollywood user_name: goldassfang Time: 2022-04-26 23:43:52+00:00 Tweeted: RT @oscarsclip: Nicole Kidman, 'Rabbit Hole' (Best Actress, 2010) #Oscars #oscarsclip https://t.co/p57eD8eLAP user_name: AgusZapata Time: 2022-04-26 23:43:21+00:00 Tweeted: RT @ErikDavis: Notes on Sony’s #CinemaCon presentation: - #AcrossTheSpiderVerse footage was the big winner. Everyone walking out is RAVING… user_name: ErwinHavranek Time: 2022-04-26 23:43:00+00:00 Tweeted: @caroljsroth The Circus. The Oscars. CNN anchor interview. user_name: PunitaLalla62 Time: 2022-04-26 23:42:28+00:00 Tweeted: RT @billieeilish: Watch Billie's performance of "No Time To Die", the Academy Award-winning @007 theme song, at the 94th #Oscars. @TheAcade… user_name: TPZOMBIE20 Time: 2022-04-26 23:42:19+00:00 Tweeted: RT @CassanovaHefner: This Slap better than the Oscars. HANDS DOWN! 😮💨 https://t.co/VorceYAWmg user_name: TeflonDaDom Time: 2022-04-26 23:42:11+00:00 Tweeted: The Monday after the Oscars wa
import pandas as pd
df=pd.read_csv("Oscars0426.csv")
df.drop_duplicates(subset='tweet_text',inplace=True)
df.head()
| username | time | tweet_text | |
|---|---|---|---|
| 0 | TheInSneider | 2022-04-26 23:59:54+00:00 | b"Nothing is a lock when it comes to the Oscar... |
| 1 | 444mnesiaa | 2022-04-26 23:59:25+00:00 | b"RT @uwaisb_: Will Smith's actions at the Osc... |
| 2 | notjeremy_ | 2022-04-26 23:59:22+00:00 | b'Amber Heard did a lot more than a slap and s... |
| 3 | RonBro66 | 2022-04-26 23:59:04+00:00 | b'RT @PageSix: Will Smith travels to India fol... |
| 4 | tasmseb | 2022-04-26 23:58:58+00:00 | b'rue euphoria http_daddyy 18+ nsfw nsfwfk hel... |
import pandas as pd
import numpy as np
import re
import nltk
nltk.download('stopwords')
stemmer = nltk.SnowballStemmer("english")
from nltk.corpus import stopwords
import string
stopword=set(stopwords.words('english'))
def clean(text):
text = str(text).lower()
text = re.sub('\[.*?\]', '', text)
text = re.sub(r"http\S+", "", text)
text = re.sub('<.*?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)
text = re.sub('@[^\s]+','',text)
text = re.sub('RT @[\w_]+:', '', text)
text = re.sub(r'^b','',text)
text = [word for word in text.split(' ') if word not in stopword]
text=" ".join(text)
text = [stemmer.stem(word) for word in text.split(' ')]
text=" ".join(text)
text=' '.join(x for x in text.split() if not x.startswith('#'))
return text
df['tweet_text'] = df['tweet_text'].apply(clean)
[nltk_data] Downloading package stopwords to [nltk_data] /Users/yangning/nltk_data... [nltk_data] Package stopwords is already up-to-date!
#Replace UTF-8 encoding with single-quotes and ellipsis
df['tweet_text'].replace(to_replace = r'\\xe2\\x80\\x99', value = '\'', regex = True, inplace = True)
df['tweet_text'].replace(to_replace = r'\\xe2\\x80\\xa6', value = '...', regex = True, inplace = True)
#Remove all other UTF-8 encoding
df['tweet_text'].replace(to_replace = r'\\x[0-9a-fA-F][0-9a-fA-F]', value = '', regex = True, inplace = True)
df.head()
| username | time | tweet_text | |
|---|---|---|---|
| 0 | TheInSneider | 2022-04-26 23:59:54+00:00 | noth lock come oscar especi day two perform iv... |
| 1 | 444mnesiaa | 2022-04-26 23:59:25+00:00 | rt uwaisb smith action oscar spark outrag cond... |
| 2 | notjeremy_ | 2022-04-26 23:59:22+00:00 | amber heard lot slap someth tell ban oscar nex... |
| 3 | RonBro66 | 2022-04-26 23:59:04+00:00 | rt pagesix smith travel india follow oscar sla... |
| 4 | tasmseb | 2022-04-26 23:58:58+00:00 | rue euphoria nsfw nsfwfk help write write essa... |
SentimentIntensityAnalyzer¶from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
df[['polarity', 'subjectivity']] = df["tweet_text"].apply(lambda Text: pd.Series(TextBlob(Text).sentiment))
for index, row in df["tweet_text"].iteritems():
score = SentimentIntensityAnalyzer().polarity_scores(row)
neg = score['neg']
neu = score['neu']
pos = score['pos']
comp = score['compound']
if neg > pos:
df.loc[index, 'sentiment'] = "negative"
elif pos > neg:
df.loc[index, 'sentiment'] = "positive"
else:
df.loc[index, 'sentiment'] = "neutral"
df.loc[index, 'Negative'] = neg
df.loc[index, 'Neutral'] = neu
df.loc[index, 'Positive'] = pos
df.loc[index, 'compound'] = comp
df.head(10)
| username | time | tweet_text | polarity | subjectivity | sentiment | Negative | Neutral | Positive | compound | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TheInSneider | 2022-04-26 23:59:54+00:00 | noth lock come oscar especi day two perform iv... | 0.00 | 0.00 | neutral | 0.000 | 1.000 | 0.000 | 0.0000 |
| 1 | 444mnesiaa | 2022-04-26 23:59:25+00:00 | rt uwaisb smith action oscar spark outrag cond... | 0.05 | 0.25 | negative | 0.478 | 0.431 | 0.091 | -0.8442 |
| 2 | notjeremy_ | 2022-04-26 23:59:22+00:00 | amber heard lot slap someth tell ban oscar nex... | 0.00 | 0.00 | negative | 0.273 | 0.606 | 0.121 | -0.4588 |
| 3 | RonBro66 | 2022-04-26 23:59:04+00:00 | rt pagesix smith travel india follow oscar sla... | 0.00 | 0.00 | positive | 0.000 | 0.833 | 0.167 | 0.1531 |
| 4 | tasmseb | 2022-04-26 23:58:58+00:00 | rue euphoria nsfw nsfwfk help write write essa... | 0.00 | 0.00 | positive | 0.000 | 0.573 | 0.427 | 0.8658 |
| 5 | MoofyKitten | 2022-04-26 23:58:54+00:00 | much like oscar slap bullshit tire hear johnni... | 0.20 | 0.20 | negative | 0.342 | 0.360 | 0.297 | -0.4588 |
| 6 | DannyPerez1438 | 2022-04-26 23:58:52+00:00 | rt erikdavi note cinemacon present n acrossthe... | 0.00 | 0.05 | positive | 0.000 | 0.725 | 0.275 | 0.5859 |
| 7 | user45761827 | 2022-04-26 23:58:43+00:00 | come sweep oscar | 0.00 | 0.00 | neutral | 0.000 | 1.000 | 0.000 | 0.0000 |
| 8 | BHPReviews | 2022-04-26 23:57:52+00:00 | rt cassanovahefn slap better oscar hand | 0.50 | 0.50 | positive | 0.000 | 0.280 | 0.720 | 0.7717 |
| 9 | TammyBr48675861 | 2022-04-26 23:57:34+00:00 | realbrysongray cmi yeah didnt angelina joli dr... | 0.00 | 0.00 | positive | 0.000 | 0.820 | 0.180 | 0.2960 |
df1 = df[["tweet_text", "Positive", "Negative", "Neutral"]]
x = sum(df1["Positive"])
y = sum(df1["Negative"])
z = sum(df1["Neutral"])
def sentiment_score(a, b, c):
if (a>b) and (a>c):
print("Positive 😊 ")
elif (b>a) and (b>c):
print("Negative 😠 ")
else:
print("Neutral 🙂 ")
sentiment_score(x, y, z)
Neutral 🙂
print("Positive: ", x)
print("Negative: ", y)
print("Neutral: ", z)
Positive: 338.78500000000065 Negative: 119.99999999999996 Neutral: 1662.213000000004
from matplotlib import pyplot as plt
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.axis('equal')
langs = ['Positive', 'Negative', 'Neutral']
senti_value = [x,y,z]
ax.pie(senti_value, labels = langs,autopct='%1.2f%%')
plt.show()
def count_values_in_column(data,feature):
total=data.loc[:,feature].value_counts(dropna=False)
percentage=round(data.loc[:,feature].value_counts(dropna=False,normalize=True)*100,2)
return pd.concat([total,percentage],axis=1,keys=['Total','Percentage'])
#Count_values for sentiment
count_values_in_column(df,"sentiment")
| Total | Percentage | |
|---|---|---|
| positive | 1041 | 49.06 |
| neutral | 752 | 35.44 |
| negative | 329 | 15.50 |
def create_wordcloud(text):
mask = np.array(Image.open('cloud.png'))
stopwords = set(STOPWORDS)
wc = WordCloud(background_color='white',
mask = mask,
max_words=3000,
stopwords=stopwords,
repeat=True)
wc.generate(str(text))
wc.to_file('wc.png')
print('Word Cloud Saved Successfully')
path='wc.png'
display(Image.open(path))
from wordcloud import WordCloud, STOPWORDS
from PIL import Image
#Creating wordcloud for all tweets
create_wordcloud(df1['tweet_text'].values)
Word Cloud Saved Successfully
tw_list_negative = df[df["sentiment"]=="negative"]
tw_list_positive = df[df["sentiment"]=="positive"]
tw_list_neutral = df[df["sentiment"]=="neutral"]
#Creating wordcloud for negative tweets
create_wordcloud(tw_list_negative['tweet_text'].values)
Word Cloud Saved Successfully
#Creating wordcloud for positive tweets
create_wordcloud(tw_list_positive['tweet_text'].values)
Word Cloud Saved Successfully
create_wordcloud(tw_list_neutral['tweet_text'].values)
Word Cloud Saved Successfully
df2=df
df2["id"] = df2.index + 1
df2['time'] = pd.to_datetime(df2['time'], errors='coerce')
df2['time_hour'] = df2['time'].dt.round('H')
aggregation = {'count': ('id','count')}
df_sentiment_by_time = df2.groupby(['time_hour', 'sentiment']).agg(**aggregation).reset_index()
df_sentiment_by_time
| time_hour | sentiment | count | |
|---|---|---|---|
| 0 | 2022-04-26 00:00:00+00:00 | negative | 7 |
| 1 | 2022-04-26 00:00:00+00:00 | neutral | 16 |
| 2 | 2022-04-26 00:00:00+00:00 | positive | 27 |
| 3 | 2022-04-26 01:00:00+00:00 | negative | 20 |
| 4 | 2022-04-26 01:00:00+00:00 | neutral | 32 |
| ... | ... | ... | ... |
| 70 | 2022-04-26 23:00:00+00:00 | neutral | 20 |
| 71 | 2022-04-26 23:00:00+00:00 | positive | 45 |
| 72 | 2022-04-27 00:00:00+00:00 | negative | 11 |
| 73 | 2022-04-27 00:00:00+00:00 | neutral | 39 |
| 74 | 2022-04-27 00:00:00+00:00 | positive | 21 |
75 rows × 3 columns
import plotly.io as pio
pio.renderers.default='notebook'
import plotly.express as px
fig = px.line(df_sentiment_by_time, x="time_hour", y="count", color="sentiment",title="Sentiment by the hour of day")
fig.show()
df_token=df2
#Appliyng Countvectorizer
from sklearn.feature_extraction.text import CountVectorizer
countVector = countVectorizer.fit_transform(df_token['tweet_text'])
count_vect_df = pd.DataFrame(countVector.toarray(), columns=countVectorizer.get_feature_names())
# Most Used Words
count = pd.DataFrame(count_vect_df.sum())
countdf = count.sort_values(0,ascending=False).head(20)
countdf[0:11]
| 0 | |
|---|---|
| oscar | 1439 |
| rt | 640 |
| smith | 404 |
| slap | 352 |
| chri | 267 |
| rock | 211 |
| best | 124 |
| like | 110 |
| movi | 109 |
| year | 106 |
| one | 104 |
#Function to ngram
def get_top_n_gram(corpus,ngram_range,n=None):
vec = CountVectorizer(ngram_range=ngram_range,stop_words = 'english').fit(corpus)
bag_of_words = vec.transform(corpus)
sum_words = bag_of_words.sum(axis=0)
words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
return words_freq[:n]
#n2_bigram
n2_bigrams = get_top_n_gram(df_token['tweet_text'],(2,2),20)
n2_bigrams
[('chris rock', 189),
('oscar slap', 183),
('rock oscar', 81),
('smith slap', 70),
('smith oscar', 52),
('apolog chris', 52),
('travel india', 49),
('person apolog', 43),
('smith person', 41),
('slap chris', 40),
('smith travel', 38),
('win oscar', 36),
('rock mom', 36),
('month oscar', 34),
('chris mom', 33),
('slap report', 31),
('india spiritu', 30),
('slap son', 29),
('oscar year', 27),
('follow oscar', 26)]
#n3_trigram
n3_trigrams = get_top_n_gram(df2['tweet_text'],(3,3),20)
n3_trigrams
[('chris rock oscar', 80),
('apolog chris rock', 52),
('rock oscar slap', 48),
('person apolog chris', 43),
('smith person apolog', 40),
('smith travel india', 36),
('chris rock mom', 35),
('oscar slap report', 31),
('smith slap son', 29),
('slap chris rock', 28),
('slap son oscar', 26),
('american cinema editor', 25),
('month oscar slap', 24),
('travel india spiritu', 24),
('smith slap chris', 23),
('india spiritu purpos', 23),
('follow oscar slap', 22),
('jada pinkett smith', 21),
('practic yoga medit', 19),
('spiritu purpos month', 18)]